Treg in JIA dataset from Adam Croft's data¶

In [23]:
suppressPackageStartupMessages({
    library(Seurat)
    library(dplyr)
    library(data.table)
    library(ggplot2)
    library(patchwork)
    library(harmony)
    library(symphony)
    library(presto)
    library(singlecellmethods)
    library(purrr)
    library(pheatmap)
    library(ggthemes)
})

set.seed(1)
source("utils.r")
source("myfun.r")
In [2]:
data.path <- "/data/brennerlab/Shani/projects/Treg/data/external_datasets/JIA_Croft/"
In [3]:
list.files(data.path)
  1. 'MAPJAG.RData'
  2. 'object'
  3. 'umap_embeddings.rds'
In [4]:
metadata <- read.table(paste0(data.path, "object/meta.tsv"))
head(metadata)   
# meta.donor <- read.csv(paste0(data.path, "all.meta.CD.csv")) %>% group_by(biosample_id) %>% distinct() 
# head(meta.donor)  
dim(metadata)
A data.frame: 6 × 40
orig.identnCount_RNAnFeature_RNApercent.mtnCount_ProteinnFeature_ProteinT_clonotype_idT_cdr3s_aaB_clonotype_idB_cdr3s_aa⋯global1onsetmaxjoint0623label15simple15clusters2312oldclusterslabel15ssubclustersmain_clusters
<chr><int><int><dbl><int><int><chr><chr><chr><chr>⋯<chr><int><int><chr><chr><chr><chr><chr><chr><chr>
AAACCTGAGACAGGCT-1_1PBMC1243710092.379975263 93clonotype3221TRB:CAISQTSATYEQYF NANA⋯T cell 5181CD4+ T T cells CD4+ naive/central memory TCD4+ naive CD4+ T cellsCD4+ naive/central memory TCD4+ T cells
AAACCTGAGACATAAC-1_1PBMC1318014287.106918351103clonotype7162TRB:CASSVDLAGATDTQYF NANA⋯T cell 5181CD4+ T T cells CD4+ KLRB1+ memory T CD4+ KLRB1+ memoryCD4+ T cellsCD4+ KLRB1+ memory T CD4+ T cells
AAACCTGAGAGGTAGA-1_1PBMC1318914862.759486289 90NA NA NANA⋯NK/yd T5181NK cells NK cellsIFNG+ NK IFNG+ NK NK cells IFNG+ NK NK cells
AAACCTGAGCCCTAAT-1_1PBMC12057 8844.472533297 89clonotype5326TRB:CASSYLAGGTNEQFF NANA⋯T cell 5181CD4+ T T cells CD4+ KLRB1+ memory T CD4+ KLRB1+ memoryCD4+ T cellsCD4+ KLRB1+ memory T CD4+ T cells
AAACCTGAGCCGCCTA-1_1PBMC12645 9812.684310292 95clonotype6751TRB:CASSPDRVLSAEAFF NANA⋯T cell 5181CD4+ T T cells CD4+ naive/central memory TCD4+ naive CD4+ T cellsCD4+ naive/central memory TCD4+ T cells
AAACCTGAGCGTTCCG-1_1PBMC12306 9512.385082223 86clonotype9493TRB:CASSYPRERHSGANVLTF;TRA:CAVNRDDKIIFNANA⋯T cell 5181Naive CD8+ TT cells CD8+ naive T CD8+ naive CD8+ T cellsCD8+ naive T CD8+ T cells
  1. 242190
  2. 40
In [ ]:
unique(metadata$main_clusters)
unique(metadata$subclusters)
unique(metadata$label15)
In [ ]:
metadata %>% filter(subclusters == "CD4+ FOXP3+ Tregs") %>% dim
metadata %>% filter(main_clusters == "CD4+ T cells") %>% dim

Load data & organize obj¶

In [ ]:
counts <- Read10X(paste0(data.path, "/object/"))
In [ ]:
obj <- CreateSeuratObject(counts = counts,  project = "JIA_Croft", min.cells=0, min.features=0)
obj

Edit metadata¶

In [17]:
obj@meta.data%>% head
A data.frame: 6 × 3
orig.identnCount_RNAnFeature_RNA
<fct><dbl><int>
AAACCTGAGACAGGCT-1_1JIA_Croft24371009
AAACCTGAGACATAAC-1_1JIA_Croft31801428
AAACCTGAGAGGTAGA-1_1JIA_Croft31891486
AAACCTGAGCCCTAAT-1_1JIA_Croft2057 884
AAACCTGAGCCGCCTA-1_1JIA_Croft2645 981
AAACCTGAGCGTTCCG-1_1JIA_Croft2306 951
In [18]:
mt <- obj@meta.data %>% tibble::rownames_to_column("cellID") %>% 
left_join(metadata %>% tibble::rownames_to_column("cellID"), by= join_by(cellID == cellID))

obj <- AddMetaData(obj, mt)
obj@meta.data %>% head
A data.frame: 6 × 47
orig.identnCount_RNAnFeature_RNAcellIDorig.ident.xnCount_RNA.xnFeature_RNA.xorig.ident.ynCount_RNA.ynFeature_RNA.y⋯global1onsetmaxjoint0623label15simple15clusters2312oldclusterslabel15ssubclustersmain_clusters
<fct><dbl><int><chr><fct><dbl><int><chr><int><int>⋯<chr><int><int><chr><chr><chr><chr><chr><chr><chr>
AAACCTGAGACAGGCT-1_1JIA_Croft24371009AAACCTGAGACAGGCT-1_1JIA_Croft24371009PBMC124371009⋯T cell 5181CD4+ T T cells CD4+ naive/central memory TCD4+ naive CD4+ T cellsCD4+ naive/central memory TCD4+ T cells
AAACCTGAGACATAAC-1_1JIA_Croft31801428AAACCTGAGACATAAC-1_1JIA_Croft31801428PBMC131801428⋯T cell 5181CD4+ T T cells CD4+ KLRB1+ memory T CD4+ KLRB1+ memoryCD4+ T cellsCD4+ KLRB1+ memory T CD4+ T cells
AAACCTGAGAGGTAGA-1_1JIA_Croft31891486AAACCTGAGAGGTAGA-1_1JIA_Croft31891486PBMC131891486⋯NK/yd T5181NK cells NK cellsIFNG+ NK IFNG+ NK NK cells IFNG+ NK NK cells
AAACCTGAGCCCTAAT-1_1JIA_Croft2057 884AAACCTGAGCCCTAAT-1_1JIA_Croft2057 884PBMC12057 884⋯T cell 5181CD4+ T T cells CD4+ KLRB1+ memory T CD4+ KLRB1+ memoryCD4+ T cellsCD4+ KLRB1+ memory T CD4+ T cells
AAACCTGAGCCGCCTA-1_1JIA_Croft2645 981AAACCTGAGCCGCCTA-1_1JIA_Croft2645 981PBMC12645 981⋯T cell 5181CD4+ T T cells CD4+ naive/central memory TCD4+ naive CD4+ T cellsCD4+ naive/central memory TCD4+ T cells
AAACCTGAGCGTTCCG-1_1JIA_Croft2306 951AAACCTGAGCGTTCCG-1_1JIA_Croft2306 951PBMC12306 951⋯T cell 5181Naive CD8+ TT cells CD8+ naive T CD8+ naive CD8+ T cellsCD8+ naive T CD8+ T cells
In [25]:
unique(colnames(obj@meta.data))
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'cellID'
  5. 'orig.ident.x'
  6. 'nCount_RNA.x'
  7. 'nFeature_RNA.x'
  8. 'orig.ident.y'
  9. 'nCount_RNA.y'
  10. 'nFeature_RNA.y'
  11. 'percent.mt'
  12. 'nCount_Protein'
  13. 'nFeature_Protein'
  14. 'T_clonotype_id'
  15. 'T_cdr3s_aa'
  16. 'B_clonotype_id'
  17. 'B_cdr3s_aa'
  18. 'scrublet_score'
  19. 'integrated_snn_res.0.1'
  20. 'integrated_snn_res.0.15'
  21. 'seurat_clusters'
  22. 'integrated_snn_res.0.12'
  23. 'percent.ribro'
  24. 'percent.Hb'
  25. 'S.Score'
  26. 'G2M.Score'
  27. 'Phase'
  28. 'old.ident'
  29. 'patient'
  30. 'TYPE'
  31. 'sample'
  32. 'sex'
  33. 'batch'
  34. 'age'
  35. 'condition'
  36. 'njoint'
  37. 'ana'
  38. 'global1'
  39. 'onset'
  40. 'maxjoint0623'
  41. 'label15'
  42. 'simple15'
  43. 'clusters2312'
  44. 'oldclusters'
  45. 'label15s'
  46. 'subclusters'
  47. 'main_clusters'
  48. 'percent.mito'
In [155]:
unique(obj$global1)
unique(obj$Phase)
unique(obj$TYPE)
unique(obj$sample)
unique(obj$global1)
'T cell'
  1. 'G1'
  2. 'S'
  3. 'G2M'
  1. 'Blood'
  2. 'SF'
  3. 'Tissue'
  1. 'B91'
  2. 'B84'
  3. 'B95'
  4. 'B811'
  5. 'B88'
  6. 'B98'
  7. 'B97'
  8. 'B814'
  9. 'B817'
  10. 'SF91'
  11. 'SF84'
  12. 'SF95'
  13. 'SF811'
  14. 'SF88'
  15. 'SF98'
  16. 'SF97'
  17. 'SF814'
  18. 'SF817'
  19. 'T95'
  20. 'T84'
  21. 'T811'
  22. 'T88'
  23. 'T98'
  24. 'T814'
  25. 'T817'
  26. 'T910'
  27. 'T96'
  28. 'T99'
'T cell'
In [157]:
donor_info <- metadata %>% select(patient, TYPE, sample, sex, batch, age, condition, njoint, ana) %>% unique()
donor_info
A data.frame: 28 × 9
patientTYPEsamplesexbatchageconditionnjointana
<int><chr><chr><chr><chr><dbl><chr><int><chr>
AAACCTGAGACAGGCT-1_1 91Blood B91 Fbatch1 6.9P-Oligo 1N
AAACCTGAGACTCGGA-1_2 84Blood B84 Fbatch115.8Poly-neg24N
AAACCTGAGACCACGA-1_3 95Blood B95 Mbatch1 3.7P-Oligo 4N
AAACCTGAGGTGATTA-1_4811Blood B811 Mbatch210.8P-Oligo 1N
AAACCTGAGGATGCGT-1_5 88Blood B88 Fbatch215.7RF+Poly 23N
AAACCTGAGACAGGCT-1_6 98Blood B98 Fbatch2 9.3P-Oligo 3Y
AAACCTGAGGACCACA-1_7 97Blood B97 Fbatch2 1.2Poly-neg 7Y
AAACCTGAGAGTGAGA-1_8814Blood B814 Mbatch314.8PsA 3NR
AAACCTGAGAGAGCTC-1_9817Blood B817 Fbatch3 9.3P-Oligo 1Y
AAACCTGAGCGTGAAC-1_10 91SF SF91 Fbatch1 6.9P-Oligo 1N
AAACCTGAGACACGAC-1_11 84SF SF84 Fbatch115.8Poly-neg24N
AAACCTGAGCAACGGT-1_12 95SF SF95 Mbatch1 3.7P-Oligo 4N
AAACCTGAGGAATGGA-1_13811SF SF811Mbatch210.8P-Oligo 1N
AAACCTGAGAGCTATA-1_14 88SF SF88 Fbatch215.7RF+Poly 23N
AAACCTGAGCCACTAT-1_15 98SF SF98 Fbatch2 9.3P-Oligo 3Y
AAACCTGAGAATCTCC-1_16 97SF SF97 Fbatch2 1.2Poly-neg 7Y
AAACCTGTCAGAGACG-1_17814SF SF814Mbatch314.8PsA 3NR
AAACCTGAGAAGGTTT-1_18817SF SF817Fbatch3 9.3P-Oligo 1Y
AAACCTGAGAAGGACA-1_19 95TissueT95 Mbatch2 3.7P-Oligo 4N
AAACCTGAGAATTCCC-1_20 84TissueT84 Fbatch215.8Poly-neg24N
AAACCTGAGTTTCCTT-1_21811TissueT811 Mbatch210.8P-Oligo 1N
AAACCTGAGACCTAGG-1_22 88TissueT88 Fbatch215.7RF+Poly 23N
AAACCTGAGAAGGTGA-1_23 98TissueT98 Fbatch2 9.3P-Oligo 3Y
AAACCTGAGCAATATG-1_24814TissueT814 Mbatch314.8PsA 3NR
AAACCTGAGAGAGCTC-1_25817TissueT817 Fbatch3 9.3P-Oligo 1Y
AAACCTGAGACTCGGA-1_26910TissueT910 Fbatch3 3.0Ex-Oligo 1Y
AAACCTGAGAATTCCC-1_27 96TissueT96 Fbatch3 4.8PsA 1N
AAACCTGAGAGGACGG-1_28 99TissueT99 Fbatch3 2.6P-Oligo 2NR
In [160]:
table(donor_info$batch, donor_info$patient)
table(donor_info$patient, donor_info$TYPE)
        
         84 88 91 95 96 97 98 99 811 814 817 910
  batch1  2  0  2  2  0  0  0  0   0   0   0   0
  batch2  1  3  0  1  0  2  3  0   3   0   0   0
  batch3  0  0  0  0  1  0  0  1   0   3   3   1
     
      Blood SF Tissue
  84      1  1      1
  88      1  1      1
  91      1  1      0
  95      1  1      1
  96      0  0      1
  97      1  1      0
  98      1  1      1
  99      0  0      1
  811     1  1      1
  814     1  1      1
  817     1  1      1
  910     0  0      1
In [21]:
# Keep T cells only
Idents(obj) <- "global1"
obj <- subset(obj, ident = "T cell")

Basic QC & preproccessing¶

In [22]:
rownames(obj)[grep(pattern = "^MT-", rownames(obj))]
  1. 'MT-ND1'
  2. 'MT-ND2'
  3. 'MT-CO1'
  4. 'MT-CO2'
  5. 'MT-ATP8'
  6. 'MT-ATP6'
  7. 'MT-CO3'
  8. 'MT-ND3'
  9. 'MT-ND4L'
  10. 'MT-ND4'
  11. 'MT-ND5'
  12. 'MT-ND6'
  13. 'MT-CYB'
In [26]:
obj[["percent.mito"]] <- PercentageFeatureSet(obj, pattern = "^MT-")
VlnPlot(obj, features = "percent.mito", pt.size = 0.0, group.by = "patient") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("% mitochondrial genes") + geom_hline(yintercept=15)

VlnPlot(obj, features = "percent.mito", pt.size = 0.0, group.by = "batch") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("% mitochondrial genes") + geom_hline(yintercept=15)
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

In [27]:
#Gene number
VlnPlot(obj, features = "nFeature_RNA", pt.size = 0.0, group.by = "patient") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("Number of Genes")
VlnPlot(obj, features = "nFeature_RNA", pt.size = 0.0, group.by = "batch") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("Number of Genes")

VlnPlot(obj, features = "nCount_RNA", pt.size = 0.0, group.by = "patient") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("nUMI")
VlnPlot(obj, features = "nCount_RNA", pt.size = 0.0, group.by = "batch") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("nUMI")
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

In [ ]:
fig.size(8,8)
suppressMessages(
    obj@meta.data %>% 
  	ggplot(aes(x=nCount_RNA, y=nFeature_RNA, color=percent.mito)) + 
  	geom_point() + 
	scale_colour_gradient(low = "gray90", high = "black") +
    facet_wrap(~orig.ident) +
  	stat_smooth(method=lm) +
  	scale_x_log10() + 
  	scale_y_log10() + 
  	theme_classic() +
  	geom_vline(xintercept = 500) +
  	geom_hline(yintercept = 250) +
    theme(text = element_text(size = 14)) 
    )
In [29]:
#Data already filtered
# obj <- subset(obj, subset = nCount_RNA > 500 & nFeature_RNA > 250 &  percent.mito < 20)
In [30]:
#initial dimensionality reduction on all samples to identify contaminating clusters
obj <- NormalizeData(obj, normalization.method = "LogNormalize", scale.factor = 10000, verbose = F) %>% 
    FindVariableFeatures(selection.method = "vst", nfeatures = 2000, verbose = F) %>% 
    ScaleData() %>% 
    RunPCA(verbose = F)
Centering and scaling data matrix

In [35]:
set.seed(1)
obj <- RunHarmony(obj, group.by.vars=c("batch", "patient", "TYPE"), reduction.use = "pca", 
                 plot_convergence = TRUE, max_iter = 10,
                 early_stop = F)
Transposing data matrix

Initializing state using k-means centroids initialization

Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony 4/10

Harmony 5/10

Harmony 6/10

Harmony 7/10

Harmony 8/10

Harmony 9/10

Harmony 10/10

In [36]:
obj
An object of class Seurat 
30803 features across 100098 samples within 1 assay 
Active assay: RNA (30803 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 2 dimensional reductions calculated: pca, harmony
In [37]:
set.seed(1)
obj <- Run_uwot_umap(obj)
obj <- FindClusters(obj, graph.name = 'humap_fgraph', resolution = 1, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 100098
Number of edges: 1234828

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8233
Number of communities: 14
Elapsed time: 14 seconds
In [39]:
fig.size(8,8)
DimPlot(obj, label = T, repel = T, pt.size = 0.5, label.size = 6) + #scale_color_tableau("Tableau 20") +
   theme(text = element_text(size = 20))
DimPlot(obj, group.by = "batch")
DimPlot(obj, group.by = "patient")
DimPlot(obj, group.by = "TYPE")
DimPlot(obj, group.by = "condition")

fig.size(8,16)
DimPlot(obj, group.by = "subclusters")
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

In [ ]:
fig.size(12, 20)
FeaturePlot(obj, c("CD3E", "CD4", "FOXP3", "IL2RA", "CTLA4", "IKZF2", "CD8A", "CXCR6"), ncol = 4, raster=T)
FeaturePlot(obj, c("CD3E", "CD4", "FOXP3", "IL2RA", "CTLA4", "IKZF2", "CD8A", "CXCR6"), ncol = 4, order = T, raster=T)
In [ ]:
fig.size(4,10)
VlnPlot(obj, "CD4", group.by = "seurat_clusters", raster=T)
VlnPlot(obj, "CD8A", group.by = "seurat_clusters", raster=T)
VlnPlot(obj, "CD3E", group.by = "seurat_clusters", raster=T)
VlnPlot(obj, "CD3D", group.by = "seurat_clusters", raster=T)
VlnPlot(obj, "FOXP3", group.by = "seurat_clusters", raster=T)
VlnPlot(obj, "IL2RA", group.by = "seurat_clusters", raster=T)
VlnPlot(obj, "CXCL13", group.by = "seurat_clusters", raster=T)
In [46]:
table(obj$subclusters, obj$seurat_clusters)
                             
                                 0    1    2    3    4    5    6    7    8    9
  Activated NK-like T          107    7    9   10 1034   10 7632    1    1    6
  CD4+ FOXP3+ Tregs              3   38   97   12    3   37    4  113 4327    4
  CD4+ KLRB1+ memory T         303 8761  654   71   40  158   42  527   69   76
  CD4+ naive/central memory T   30 3670 9539  578  253 9681   34  112   32    8
  CD56+bright NK                 1    4    1    0    0    1    8    0    0    0
  CD8+ GZMB+/GZMK+ memory T   7870    6    1   26    1    1   62   12    3 1548
  CD8+ GZMK+ memory T         5853  125  186  105  756   12  286   21    3  215
  CD8+ naive T                 113   40  708  109 8401  278  354    4    0   13
  CXCL13+ TpH                   72 1774  333  161   11  157   18 5327 1209  449
  Cycling Myeloid                0    0    1    0    0    0    8    0    0    0
  Cycling T                      4    7    0    1    2    0   50    1    1    3
  IFNG+ NK                      72    0    0    0    0    0    5    0    0    3
  MAIT cells                     3   33    4    3    0    0    0    0    0    0
  NK-like ILC1                  12    0   14   79    1    0    1    0    0    0
  THEMIS+ IL7R+ ILC            235   73  635 9945   67  197   62   46   82   30
  Vdelta2 yd                    27    4    0    0    1    0   10    0    0    0
                             
                                10   11   12   13
  Activated NK-like T           14    0    0   19
  CD4+ FOXP3+ Tregs              1   80   13    6
  CD4+ KLRB1+ memory T           0    0  131   40
  CD4+ naive/central memory T    3    7  136   64
  CD56+bright NK                 2    0    0    9
  CD8+ GZMB+/GZMK+ memory T      4    0   10   16
  CD8+ GZMK+ memory T           24    0   53   23
  CD8+ naive T                   7    0   57   22
  CXCL13+ TpH                    2   29   90   17
  Cycling Myeloid                0    0    0    0
  Cycling T                      0    0    0    0
  IFNG+ NK                       5    0    0   34
  MAIT cells                     0    0    0    0
  NK-like ILC1                1183    0    0   66
  THEMIS+ IL7R+ ILC            367  990    4  112
  Vdelta2 yd                     0    0    0    5

subset Tregs I¶

In [61]:
#subset 
tregs <- subset(obj, ident = c(8))
tregs
An object of class Seurat 
30803 features across 5727 samples within 1 assay 
Active assay: RNA (30803 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 3 dimensional reductions calculated: pca, harmony, humap

redo preproccessing¶

In [62]:
tregs <- NormalizeData(tregs, normalization.method = "LogNormalize", scale.factor = 10000, verbose = F) %>% 
    FindVariableFeatures(selection.method = "vst", nfeatures = 2000, verbose = F) %>% 
    ScaleData() %>% 
    RunPCA(verbose = F)
Centering and scaling data matrix

In [63]:
set.seed(1)
tregs <- RunHarmony(tregs, group.by.vars=c("batch", "patient", "TYPE"), reduction.use = "pca", 
                 plot_convergence = TRUE, max_iter = 10,
                 early_stop = T)
Transposing data matrix

Initializing state using k-means centroids initialization

Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony 4/10

Harmony converged after 4 iterations

In [64]:
set.seed(1)
tregs <- Run_uwot_umap(tregs)
tregs <- FindClusters(tregs, graph.name = 'humap_fgraph', resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 5727
Number of edges: 69517

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.6827
Number of communities: 9
Elapsed time: 0 seconds
In [65]:
fig.size(8,8)
DimPlot(tregs, label = T, repel = T, pt.size = 0.5, label.size = 6) + 
  scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
DimPlot(tregs, group.by = "batch")
DimPlot(tregs, group.by = "patient")
DimPlot(tregs, group.by = "TYPE")
DimPlot(tregs, group.by = "condition")

fig.size(8,16)
DimPlot(tregs, group.by = "subclusters")
In [67]:
fig.size(12, 15)
FeaturePlot(tregs, c("CD3E", "CD4", "FOXP3", "IL2RA", "CTLA4", "IKZF2", "AREG", "TCF7", "CXCR6"), ncol = 3)
In [68]:
fig.size(4,10)
VlnPlot(tregs, "FOXP3", group.by = "seurat_clusters")
VlnPlot(tregs, "IL2RA", group.by = "seurat_clusters")
VlnPlot(tregs, "AREG", group.by = "seurat_clusters")
VlnPlot(tregs, "TCF7", group.by = "seurat_clusters")
In [69]:
# comapre to source annotations
table(tregs$subclusters, tregs$seurat_clusters)
                             
                                0   1   2   3   4   5   6   7   8
  Activated NK-like T           0   0   0   0   0   0   1   0   0
  CD4+ FOXP3+ Tregs           629 903 872 576 802 362 123  35  25
  CD4+ KLRB1+ memory T         27  18   5  12   0   1   6   0   0
  CD4+ naive/central memory T  28   3   0   1   0   0   0   0   0
  CD8+ GZMB+/GZMK+ memory T     0   0   0   0   0   0   3   0   0
  CD8+ GZMK+ memory T           0   0   0   1   0   1   1   0   0
  CXCL13+ TpH                 455 213  99 283   8 103  41   5   2
  Cycling T                     0   0   0   0   1   0   0   0   0
  THEMIS+ IL7R+ ILC            11   0   1   0   2  66   2   0   0

cluster DEGs¶

In [70]:
Tregs.markers <- wilcoxauc(tregs, "seurat_clusters")

options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)

Tregs.markers %>% 
    group_by(group) %>% 
    filter(padj < 0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    #slice_max(n = 40, order_by = logFC)%>% 
    dplyr::select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature) -> Treg.show
Treg.show[1:50,]
A tibble: 50 × 10
row012345678
<int><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1LEF1 KLF2 LGALS1 FOS TNFRSF18 MTRNR2L12CCL5 NR4A1 TRBV7-6
2RPS12 EMP3 HLA-DRB1JUNB TNFRSF4 NEAT1 NKG7 DUSP2 TRBV7-4
3EEF1B2TXNIP S100A4 DUSP1 TIGIT SYNE2 CD8A CD69 TRBV7-2
4TCF7 TLE5 CD74 CREM LINC01943 MT-ATP6 CTSW FOS TRBV7-9
5RPS3A CD52 S100A11 ZNF331 CTLA4 MALAT1 CD8B NFKBID TRAV38-2DV8
6RPS13 LIME1 LGALS3 FOSB BATF MT-CO1 GZMA TNFRSF4 KLRB1
7RPL32 SELPLG S100A6 DUSP2 CTSC ARHGAP15 GZMK ZFP36L1 GZMA
8RPS8 S100A10 VIM ZFP36 CARD16 FOXP1 CST7 CTLA4 STX10
9RPS5 S100A6 CRIP1 DNAJB1 IL2RA RNF213 LAG3 ZFP36 PIK3CG
10RPS18 SH3BGRL3S100A10 TNFAIP3 FOXP3 MT-ND5 GNLY PIM3 CLSTN3
11RPL13 TMSB10 ANXA2 RGCC CD74 MT-CO3 KLRK1 RGCC FBXL14
12RPL30 B2M HLA-DRA CXCR4 SPOCK2 SKAP1 CCL4 EGR3 PRKAB1
13TXNIP HLA-C CLIC1 NR4A2 SRGN CASK GZMB EGR1 NA
14RPL22 RPL14 ANXA1 JUND IL2RB SMCHD1 ZFP36L2 NR4A3 NA
15RPL3 RPL18A UCP2 CD69 TYMP LINC02694ANXA1 PHLDA1 NA
16RPS27 NA HLA-DPA1JUN GAPDH MT-ND6 CD96 BATF NA
17RPL39 NA MYL6 DNAJA1 TNFRSF1B MBNL1 THEMIS TNF NA
18MAL NA TIMP1 LMNA ENO1 MT-CYB CD7 SH2D2A NA
19RPS6 NA EMP3 YPEL5 DUSP4 MT-CO2 GZMM ICOS NA
20CCR7 NA CD99 SLC2A3 CXCR6 PHACTR2 PDE3B KLF6 NA
21RPL21 NA GZMA TENT5C PKM CHST11 AOAH BTG2 NA
22SELL NA HLA-DPB1RGS1 LAYN EPB41 PARP8 NFKBIA NA
23RPS23 NA LY6E PPP1R15ASAT1 MT-ND1 HCST SRGN NA
24RPS21 NA SH3BGRL3BTG2 CST7 LPP PTMS CSRNP1 NA
25RPL34 NA HLA-DRB5DUSP4 TPI1 MT-ND4L GZMH GEM NA
26RPLP2 NA GAPDH SELENOK PHACTR2 USP15 STK17A BIRC3 NA
27RPL18ANA CD2 KLF6 CD2 RABGAP1L S100A6 TNFRSF18 NA
28GIMAP7NA TXN PHLDA1 ACP5 AKAP13 SP100 JUND NA
29RPL5 NA CTSC FTH1 CD7 GLCCI1 LYAR IER2 NA
30RPS29 NA TYMP TSC22D3 IGFLR1 CAMK4 FXYD2 CREM NA
31RPL11 NA CKLF HSPH1 PHLDA1 PDE3B ADGRE5 NR4A2 NA
32RPL9 NA ARPC1B SRGN MAGEH1 DPYD CD99 TNFRSF1B NA
33RPL10ANA COTL1 UBE2S PRDM1 GPRIN3 CBLB DNAJA1 NA
34RPS25 NA IL2RG HSPA8 CACYBP LDLRAD4 CD81 JUNB NA
35RPL35ANA ACTG1 NR4A1 AC017002.3TNIK PRF1 PTPN7 NA
36RPS27ANA ENO1 HSPA1B CD27 IQGAP2 TCF25 GNG2 NA
37RPL29 NA CORO1A HSP90AA1IL2RG MT-ND3 PIK3R5 REL NA
38RPS14 NA PLP2 SOCS1 DNPH1 PIK3R5 PITPNC1 TBC1D4 NA
39RPL36 NA MYL12A BTG1 PGAM1 PACS1 RUNX3 ARID5B NA
40RPL37 NA AHNAK SAMSN1 STK17B KLF12 LITAF NAMPT NA
41RPL19 NA TPM3 TAGAP GBP5 LRBA APOBEC3GRNF19A NA
42KLF2 NA HCST H3F3B LGALS3 PRKCH CYBA HSP90AA1 NA
43RPL36ANA CD52 HSP90AB1ICOS MT-ND4 ITGAE RILPL2 NA
44RPS3 NA PKM CCNH MYL6 MT-ND2 ID2 PPP1R15A NA
45RPL10 NA ACTB NR4A3 CBLB SAMHD1 CTSD NEDD9 NA
46RPL38 NA CXCR3 MYADM IL12RB2 NA CXCR6 NFKB1 NA
47RPL8 NA HLA-DQB1PTPN22 RNF213 NA TLN1 ARF6 NA
48EEF1G NA CYBA IRF1 GBP2 NA MBP MTRNR2L12NA
49RPL12 NA LSP1 HSPE1 PTPRJ NA EMB STAT3 NA
50EEF1A1NA RNF213 NAMPT LAG3 NA GSTP1 RAB8B NA
In [53]:
table(tregs$seurat_clusters)
   0    1    2    3    4    5    6    7    8    9   10   11 
1538 1224 1178 1102 1018  970  576  506  434  233  152  130 

Subset Tregs II¶

consistent with the strategy for other datasets

In [72]:
# 6 is CD8
# 8 is contamination TRBV genes
tregs <- subset(tregs, ident = c(6,8), invert = T)

Final Tregs - Normalization, DimRed, Clustering¶

In [75]:
#initial dimensionality reduction on all samples to identify contaminating clusters
tregs <- NormalizeData(tregs, normalization.method = "LogNormalize", scale.factor = 10000, verbose = F) %>% 
    FindVariableFeatures(selection.method = "vst", nfeatures = 2000, verbose = F) %>% 
    ScaleData() %>% 
    RunPCA(verbose = F)
Centering and scaling data matrix

In [73]:
tregs
An object of class Seurat 
30803 features across 5523 samples within 1 assay 
Active assay: RNA (30803 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 3 dimensional reductions calculated: pca, harmony, humap
In [74]:
tregs@meta.data %>% head
A data.frame: 6 × 50
orig.identnCount_RNAnFeature_RNAcellIDorig.ident.xnCount_RNA.xnFeature_RNA.xorig.ident.ynCount_RNA.ynFeature_RNA.ypercent.mtnCount_ProteinnFeature_ProteinT_clonotype_idT_cdr3s_aaB_clonotype_idB_cdr3s_aascrublet_scoreintegrated_snn_res.0.1integrated_snn_res.0.15seurat_clustersintegrated_snn_res.0.12percent.ribropercent.HbS.ScoreG2M.ScorePhaseold.identpatientTYPEsamplesexbatchageconditionnjointanaglobal1onsetmaxjoint0623label15simple15clusters2312oldclusterslabel15ssubclustersmain_clusterspercent.mitohumap_fgraph_res.1humap_fgraph_res.0.8
<fct><dbl><int><chr><fct><dbl><int><chr><int><int><dbl><int><int><chr><chr><chr><chr><dbl><int><int><fct><int><dbl><dbl><dbl><dbl><chr><chr><int><chr><chr><chr><chr><dbl><chr><int><chr><chr><int><int><chr><chr><chr><chr><chr><chr><chr><dbl><fct><fct>
AAAGATGAGCAGCGTA-1_1JIA_Croft1350 760AAAGATGAGCAGCGTA-1_1JIA_Croft1350 760PBMC11350 7603.1111111340 99clonotype4891TRB:CASSLAIRNYGYTF NANA0.0608695711010.15073200 0.01850598 0.068792887G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ KLRB1+ memory TCD4+ KLRB1+ memoryCD4+ T cellsCD4+ KLRB1+ memory TCD4+ T cells3.111111180
AAAGATGTCCTAGTGA-1_1JIA_Croft1485 773AAAGATGTCCTAGTGA-1_1JIA_Croft1485 773PBMC11485 7731.4814815292 93clonotype77 TRA:CAVRLSWGKLQF NANA0.0816326511110.14954130-0.04059452 0.008258175G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells1.481481581
AAAGTAGCACTAGTAC-1_1JIA_Croft1567 847AAAGTAGCACTAGTAC-1_1JIA_Croft1567 847PBMC11567 8472.1059349330100clonotype1715TRB:CASSSQNRAEQYF NANA0.0526315811010.12984400 0.01361041 0.082733395G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCXCL13+ TpH CXCL13+ TpH CD4+ T cellsCXCL13+ TpH CD4+ T cells2.105934980
AACCATGGTCGCATCG-1_1JIA_Croft25891048AACCATGGTCGCATCG-1_1JIA_Croft25891048PBMC1258910480.9269988278 89clonotype751 TRB:CSAEETGPETQYF;TRA:CAVTGGTSYGKLTF NANA0.1562500011010.14895720 0.04661555-0.045203442S S 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells0.926998880
AACCATGGTTGCTCCT-1_1JIA_Croft20491102AACCATGGTTGCTCCT-1_1JIA_Croft20491102PBMC1204911021.2201074279 90clonotype9315TRB:CASRSTGGGWSNPYEQYF;TRA:CVVNPGRGGAQKLVF NANA0.0391459111110.11044920-0.04041617-0.052987582G1 G1 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells1.220107481
AACGTTGAGGATGGAA-1_1JIA_Croft47571769AACGTTGAGGATGGAA-1_1JIA_Croft47571769PBMC1475717691.8499054293 92clonotype1286TRB:CASSPILGGPTEAFF;TRB:CSARDPRQGYEQFF;TRA:CAASHNNNDMRF;TRA:CAVMEYGNKLVFNANA0.0949720711010.11857730-0.02773367-0.003626403G1 G1 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCXCL13+ TpH CXCL13+ TpH CD4+ T cellsCXCL13+ TpH CD4+ T cells1.849905480
In [76]:
fig.size(4,4)
set.seed(1)
tregs <- RunHarmony(tregs, group.by.vars=c("batch", "patient", "TYPE"), reduction.use = "pca", 
                 plot_convergence = TRUE, max_iter = 10,
                 early_stop = T)

# ElbowPlot(obj, ndims = 50, reduction = "harmony") 
# ElbowPlot(obj, ndims = 50, reduction = "pca")
Transposing data matrix

Initializing state using k-means centroids initialization

Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony 4/10

Harmony converged after 4 iterations

In [77]:
set.seed(1)
tregs <- Run_uwot_umap(tregs)
tregs <- FindClusters(tregs, graph.name = 'humap_fgraph', resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 5523
Number of edges: 67522

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.6758
Number of communities: 8
Elapsed time: 0 seconds
In [78]:
fig.size(6,6)
DimPlot(tregs, label = T, repel = T, pt.size = 0.5, label.size = 6) + 
  scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
DimPlot(tregs, group.by = "batch")
DimPlot(tregs, group.by = "patient")
DimPlot(tregs, group.by = "TYPE")
DimPlot(tregs, group.by = "condition")

fig.size(8,16)
DimPlot(tregs, group.by = "subclusters")
In [79]:
fig.size(12, 12)
FeaturePlot(tregs, c("FOXP3", "IL2RA", "CTLA4", "IKZF2", "CXCR6", "AREG", "MKI67", "TCF7", "CXCL13"), ncol = 3)
FeaturePlot(tregs, order = T, c("FOXP3", "IL2RA", "CTLA4", "IKZF2", "CXCR6", "AREG", "MKI67", "TCF7", "CXCL13"), ncol = 3)
In [80]:
fig.size(5, 30)
FeaturePlot(tregs, order = T, c("FOXP3"), split.by = "seurat_clusters", ncol = 4)
FeaturePlot(tregs, order = F, c("FOXP3"), split.by = "seurat_clusters", ncol = 4)
In [81]:
fig.size(2,10)
VlnPlot(tregs, "FOXP3", group.by = "seurat_clusters")
VlnPlot(tregs, "IL2RA", group.by = "seurat_clusters")
VlnPlot(tregs, "CD4", group.by = "seurat_clusters")
VlnPlot(tregs, "CD8A", group.by = "seurat_clusters")
VlnPlot(tregs, "CXCL13", group.by = "seurat_clusters")

Save Obj.¶

In [ ]:
saveRDS(tregs, "/data/brennerlab/Shani/projects/Treg/analysis/Treg_nonRA/JIA_Croft_Tregs.RDS")
# treg <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/Treg_nonRA/CD_Tregs.RDS")

Map to RA tregs with symphony¶

In [83]:
ref <- "/data/brennerlab/Shani/projects/Treg/analysis/integrated/integrated_Tregs_SymphonyRef.rds"
tregRef<- readRDS(ref)
In [163]:
query = mapQuery(tregs@assays$RNA@counts,             # query gene expression (genes x cells)
                 tregs@meta.data,        # query metadata (cells x attributes)
                 tregRef,             # Symphony reference object 
                 vars = c("batch", "patient", "TYPE"),           #"PubID", "Site", "Type" # Query batch variables to harmonize over (NULL treats query as one batch)
                 do_normalize = TRUE,  # perform log(CP10k) normalization on query (set to FALSE if already normalized)
                 do_umap = TRUE)        # project query cells into reference UMAP
Normalizing

Scaling and synchronizing query gene expression

Found 1940 out of 2000 reference variable genes in query dataset

Project query cells using reference gene loadings

Clustering query cells to reference centroids

Correcting query batch effects

UMAP

All done!

In [164]:
summary(query)
          Length    Class      Mode   
exp       170124969 dgCMatrix  S4     
meta_data        54 data.frame list   
Z            276150 -none-     numeric
Zq_pca       276150 -none-     numeric
R            552300 -none-     numeric
Xq            44184 -none-     numeric
umap          11046 -none-     numeric
In [165]:
fig.size(4,6)
qp <- query$umap %>% as.data.frame %>% cbind(query$meta_data) %>% 
ggplot(aes(x=UMAP1, y=UMAP2, color=seurat_clusters)) + geom_point(size = 0.2) + 
theme_bw(base_size = 18) 

qp
fig.size(4,10)
qp + facet_wrap(facets = vars(TYPE))
qp + facet_wrap(facets = vars(patient))
In [166]:
# Predict query cell types
query <- knnPredict(query, tregRef, 
                   train_labels = tregRef$meta_data$cell.states, 
                   k = 30, 
                   confidence = TRUE) # calculate prediction confidence
In [167]:
query$meta_data %>% head
A data.frame: 6 × 56
orig.identnCount_RNAnFeature_RNAcellIDorig.ident.xnCount_RNA.xnFeature_RNA.xorig.ident.ynCount_RNA.ynFeature_RNA.ypercent.mtnCount_ProteinnFeature_ProteinT_clonotype_idT_cdr3s_aaB_clonotype_idB_cdr3s_aascrublet_scoreintegrated_snn_res.0.1integrated_snn_res.0.15seurat_clustersintegrated_snn_res.0.12percent.ribropercent.HbS.ScoreG2M.ScorePhaseold.identpatientTYPEsamplesexbatchageconditionnjointanaglobal1onsetmaxjoint0623label15simple15clusters2312oldclusterslabel15ssubclustersmain_clusterspercent.mitohumap_fgraph_res.1humap_fgraph_res.0.8cell.statescell.states.knn.probdonorIDtissuecell_type_pred_knncell_type_pred_knn_prob
<fct><dbl><int><chr><fct><dbl><int><chr><int><int><dbl><int><int><chr><chr><chr><chr><dbl><int><int><fct><int><dbl><dbl><dbl><dbl><chr><chr><int><chr><chr><chr><chr><dbl><chr><int><chr><chr><int><int><chr><chr><chr><chr><chr><chr><chr><dbl><fct><fct><fct><dbl><int><chr><fct><dbl>
AAAGATGAGCAGCGTA-1_1JIA_Croft1350 760AAAGATGAGCAGCGTA-1_1JIA_Croft1350 760PBMC11350 7603.1111111340 99clonotype4891TRB:CASSLAIRNYGYTF NANA0.0608695711010.15073200 0.01850598 0.068792887G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ KLRB1+ memory TCD4+ KLRB1+ memoryCD4+ T cellsCD4+ KLRB1+ memory TCD4+ T cells3.111111180Naive Treg 0.433333391BloodNaive Treg 0.4333333
AAAGATGTCCTAGTGA-1_1JIA_Croft1485 773AAAGATGTCCTAGTGA-1_1JIA_Croft1485 773PBMC11485 7731.4814815292 93clonotype77 TRA:CAVRLSWGKLQF NANA0.0816326511010.14954130-0.04059452 0.008258175G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells1.481481580Naive Treg 0.566666791BloodCD25int Treg0.5666667
AAAGTAGCACTAGTAC-1_1JIA_Croft1567 847AAAGTAGCACTAGTAC-1_1JIA_Croft1567 847PBMC11567 8472.1059349330100clonotype1715TRB:CASSSQNRAEQYF NANA0.0526315811110.12984400 0.01361041 0.082733395G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCXCL13+ TpH CXCL13+ TpH CD4+ T cellsCXCL13+ TpH CD4+ T cells2.105934981Naive Treg 0.600000091BloodNaive Treg 0.6333333
AACCATGGTCGCATCG-1_1JIA_Croft25891048AACCATGGTCGCATCG-1_1JIA_Croft25891048PBMC1258910480.9269988278 89clonotype751 TRB:CSAEETGPETQYF;TRA:CAVTGGTSYGKLTF NANA0.1562500011110.14895720 0.04661555-0.045203442S S 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells0.926998881Naive Treg 0.933333391BloodNaive Treg 0.9333333
AACCATGGTTGCTCCT-1_1JIA_Croft20491102AACCATGGTTGCTCCT-1_1JIA_Croft20491102PBMC1204911021.2201074279 90clonotype9315TRB:CASRSTGGGWSNPYEQYF;TRA:CVVNPGRGGAQKLVF NANA0.0391459111610.11044920-0.04041617-0.052987582G1 G1 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells1.220107486CD25int Treg0.566666791BloodISG Treg 0.5333333
AACGTTGAGGATGGAA-1_1JIA_Croft47571769AACGTTGAGGATGGAA-1_1JIA_Croft47571769PBMC1475717691.8499054293 92clonotype1286TRB:CASSPILGGPTEAFF;TRB:CSARDPRQGYEQFF;TRA:CAASHNNNDMRF;TRA:CAVMEYGNKLVFNANA0.0949720711110.11857730-0.02773367-0.003626403G1 G1 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCXCL13+ TpH CXCL13+ TpH CD4+ T cellsCXCL13+ TpH CD4+ T cells1.849905481Naive Treg 0.766666791BloodNaive Treg 0.7333333
In [168]:
fig.size(8, 12)
qp <- query$umap %>% as.data.frame %>% cbind(query$meta_data) %>% 
ggplot(aes(x=UMAP1, y=UMAP2, color=cell_type_pred_knn)) + geom_point(size = 0.8) + 
theme_bw(base_size = 18) + guides(colour = guide_legend(override.aes = list(size=2))) + 
scale_color_manual(values = cell.state.colors$cell.states) + 
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +coord_fixed()

qp
fig.size(10,18)
qp + facet_wrap(facets = vars(cell_type_pred_knn))
fig.size(6, 16)
qp + facet_wrap(facets = vars(TYPE))
qp + facet_wrap(facets = vars(batch))
qp + facet_wrap(facets = vars(patient))
qp + facet_wrap(facets = vars(age))
In [169]:
query$meta_data %>% select(subclusters, cell_type_pred_knn, cell_type_pred_knn_prob) %>% arrange(cell_type_pred_knn_prob) %>% head
table(query$meta_data$subclusters, query$meta_data$cell_type_pred_knn)
A data.frame: 6 × 3
subclusterscell_type_pred_knncell_type_pred_knn_prob
<chr><fct><dbl>
GTGCTTCCATCGGACC-1_12CD4+ KLRB1+ memory TAREG+ Treg 0.2333333
ACATACGCAAGCTGAG-1_14CXCL13+ TpH CD25int Treg 0.2333333
TTCGAAGAGCGAAGGG-1_14CD4+ FOXP3+ Tregs CD25highCXCR6+ Treg0.2580645
TCAGCTCTCGTCTGAA-1_10CXCL13+ TpH CD161+mem. Treg 0.2666667
CCAGCGATCTGGCGAC-1_11CXCL13+ TpH CD25high Treg 0.2666667
CATATGGCAGTTTACG-1_14CD4+ FOXP3+ Tregs AREG+ Treg 0.2666667
                             
                              Naive Treg CD25int Treg CD25high Treg
  CD4+ FOXP3+ Tregs                  285          516          1557
  CD4+ KLRB1+ memory T                 8           18            11
  CD4+ naive/central memory T         15            8             0
  CD8+ GZMK+ memory T                  0            0             0
  CXCL13+ TpH                         98          271           226
  Cycling T                            0            0             0
  THEMIS+ IL7R+ ILC                   16           11            17
                             
                              CD25highCXCR6+ Treg AREG+ Treg TNFAIP3+ Treg
  CD4+ FOXP3+ Tregs                           990        212           358
  CD4+ KLRB1+ memory T                          1         15             2
  CD4+ naive/central memory T                   0          8             0
  CD8+ GZMK+ memory T                           0          1             1
  CXCL13+ TpH                                  59        332            94
  Cycling T                                     0          0             0
  THEMIS+ IL7R+ ILC                            20         11             0
                             
                              CD161+mem. Treg ISG Treg GZM Treg Prolif.
  CD4+ FOXP3+ Tregs                        61      179       14       7
  CD4+ KLRB1+ memory T                      5        0        3       0
  CD4+ naive/central memory T               1        0        0       0
  CD8+ GZMK+ memory T                       0        0        0       0
  CXCL13+ TpH                              41       38        6       1
  Cycling T                                 0        0        0       1
  THEMIS+ IL7R+ ILC                         2        2        0       1
In [ ]:
fig.size(8, 12)
qp + facet_wrap(facets = vars(subclusters, TYPE))
# qp + facet_wrap(facets = vars(orig.ident, Layer))
In [171]:
mt<- tregs@meta.data %>% tibble::rownames_to_column("rownames") %>% 
left_join(query$meta_data %>% tibble::rownames_to_column("rownames"), by = join_by(rownames))
mt %>% head
A data.frame: 6 × 111
rownamesorig.ident.x.xnCount_RNA.x.xnFeature_RNA.x.xcellID.xorig.ident.x.x.xnCount_RNA.x.x.xnFeature_RNA.x.x.xorig.ident.y.xnCount_RNA.y.xnFeature_RNA.y.xpercent.mt.xnCount_Protein.xnFeature_Protein.xT_clonotype_id.xT_cdr3s_aa.xB_clonotype_id.xB_cdr3s_aa.xscrublet_score.xintegrated_snn_res.0.1.xintegrated_snn_res.0.15.xseurat_clusters.xintegrated_snn_res.0.12.xpercent.ribro.xpercent.Hb.xS.Score.xG2M.Score.xPhase.xold.ident.xpatient.xTYPE.xsample.xsex.xbatch.xage.xcondition.xnjoint.xana.xglobal1.xonset.xmaxjoint0623.xlabel15.xsimple15.xclusters2312.xoldclusters.xlabel15s.xsubclusters.xmain_clusters.xpercent.mito.xhumap_fgraph_res.1.xhumap_fgraph_res.0.8.xcell.states.xcell.states.knn.prob.xdonorID.xtissue.xorig.ident.y.ynCount_RNA.y.ynFeature_RNA.y.ycellID.yorig.ident.x.ynCount_RNA.x.ynFeature_RNA.x.yorig.ident.y.y.ynCount_RNA.y.y.ynFeature_RNA.y.y.ypercent.mt.ynCount_Protein.ynFeature_Protein.yT_clonotype_id.yT_cdr3s_aa.yB_clonotype_id.yB_cdr3s_aa.yscrublet_score.yintegrated_snn_res.0.1.yintegrated_snn_res.0.15.yseurat_clusters.yintegrated_snn_res.0.12.ypercent.ribro.ypercent.Hb.yS.Score.yG2M.Score.yPhase.yold.ident.ypatient.yTYPE.ysample.ysex.ybatch.yage.ycondition.ynjoint.yana.yglobal1.yonset.ymaxjoint0623.ylabel15.ysimple15.yclusters2312.yoldclusters.ylabel15s.ysubclusters.ymain_clusters.ypercent.mito.yhumap_fgraph_res.1.yhumap_fgraph_res.0.8.ycell.states.ycell.states.knn.prob.ydonorID.ytissue.ycell_type_pred_knncell_type_pred_knn_prob
<chr><fct><dbl><int><chr><fct><dbl><int><chr><int><int><dbl><int><int><chr><chr><chr><chr><dbl><int><int><fct><int><dbl><dbl><dbl><dbl><chr><chr><int><chr><chr><chr><chr><dbl><chr><int><chr><chr><int><int><chr><chr><chr><chr><chr><chr><chr><dbl><fct><fct><fct><dbl><int><chr><fct><dbl><int><chr><fct><dbl><int><chr><int><int><dbl><int><int><chr><chr><chr><chr><dbl><int><int><fct><int><dbl><dbl><dbl><dbl><chr><chr><int><chr><chr><chr><chr><dbl><chr><int><chr><chr><int><int><chr><chr><chr><chr><chr><chr><chr><dbl><fct><fct><fct><dbl><int><chr><fct><dbl>
1AAAGATGAGCAGCGTA-1_1JIA_Croft1350 760AAAGATGAGCAGCGTA-1_1JIA_Croft1350 760PBMC11350 7603.1111111340 99clonotype4891TRB:CASSLAIRNYGYTF NANA0.0608695711010.15073200 0.01850598 0.068792887G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ KLRB1+ memory TCD4+ KLRB1+ memoryCD4+ T cellsCD4+ KLRB1+ memory TCD4+ T cells3.111111180Naive Treg 0.433333391BloodJIA_Croft1350 760AAAGATGAGCAGCGTA-1_1JIA_Croft1350 760PBMC11350 7603.1111111340 99clonotype4891TRB:CASSLAIRNYGYTF NANA0.0608695711010.15073200 0.01850598 0.068792887G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ KLRB1+ memory TCD4+ KLRB1+ memoryCD4+ T cellsCD4+ KLRB1+ memory TCD4+ T cells3.111111180Naive Treg 0.433333391BloodNaive Treg 0.4333333
2AAAGATGTCCTAGTGA-1_1JIA_Croft1485 773AAAGATGTCCTAGTGA-1_1JIA_Croft1485 773PBMC11485 7731.4814815292 93clonotype77 TRA:CAVRLSWGKLQF NANA0.0816326511010.14954130-0.04059452 0.008258175G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells1.481481580Naive Treg 0.566666791BloodJIA_Croft1485 773AAAGATGTCCTAGTGA-1_1JIA_Croft1485 773PBMC11485 7731.4814815292 93clonotype77 TRA:CAVRLSWGKLQF NANA0.0816326511010.14954130-0.04059452 0.008258175G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells1.481481580Naive Treg 0.566666791BloodCD25int Treg0.5666667
3AAAGTAGCACTAGTAC-1_1JIA_Croft1567 847AAAGTAGCACTAGTAC-1_1JIA_Croft1567 847PBMC11567 8472.1059349330100clonotype1715TRB:CASSSQNRAEQYF NANA0.0526315811110.12984400 0.01361041 0.082733395G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCXCL13+ TpH CXCL13+ TpH CD4+ T cellsCXCL13+ TpH CD4+ T cells2.105934981Naive Treg 0.600000091BloodJIA_Croft1567 847AAAGTAGCACTAGTAC-1_1JIA_Croft1567 847PBMC11567 8472.1059349330100clonotype1715TRB:CASSSQNRAEQYF NANA0.0526315811110.12984400 0.01361041 0.082733395G2MG2M91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCXCL13+ TpH CXCL13+ TpH CD4+ T cellsCXCL13+ TpH CD4+ T cells2.105934981Naive Treg 0.600000091BloodNaive Treg 0.6333333
4AACCATGGTCGCATCG-1_1JIA_Croft25891048AACCATGGTCGCATCG-1_1JIA_Croft25891048PBMC1258910480.9269988278 89clonotype751 TRB:CSAEETGPETQYF;TRA:CAVTGGTSYGKLTF NANA0.1562500011110.14895720 0.04661555-0.045203442S S 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells0.926998881Naive Treg 0.933333391BloodJIA_Croft25891048AACCATGGTCGCATCG-1_1JIA_Croft25891048PBMC1258910480.9269988278 89clonotype751 TRB:CSAEETGPETQYF;TRA:CAVTGGTSYGKLTF NANA0.1562500011110.14895720 0.04661555-0.045203442S S 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells0.926998881Naive Treg 0.933333391BloodNaive Treg 0.9333333
5AACCATGGTTGCTCCT-1_1JIA_Croft20491102AACCATGGTTGCTCCT-1_1JIA_Croft20491102PBMC1204911021.2201074279 90clonotype9315TRB:CASRSTGGGWSNPYEQYF;TRA:CVVNPGRGGAQKLVF NANA0.0391459111610.11044920-0.04041617-0.052987582G1 G1 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells1.220107486CD25int Treg0.566666791BloodJIA_Croft20491102AACCATGGTTGCTCCT-1_1JIA_Croft20491102PBMC1204911021.2201074279 90clonotype9315TRB:CASRSTGGGWSNPYEQYF;TRA:CVVNPGRGGAQKLVF NANA0.0391459111610.11044920-0.04041617-0.052987582G1 G1 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCD4+ FOXP3+ Tregs CD4+ FOXP3+ Tregs CD4+ T cellsCD4+ FOXP3+ Tregs CD4+ T cells1.220107486CD25int Treg0.566666791BloodISG Treg 0.5333333
6AACGTTGAGGATGGAA-1_1JIA_Croft47571769AACGTTGAGGATGGAA-1_1JIA_Croft47571769PBMC1475717691.8499054293 92clonotype1286TRB:CASSPILGGPTEAFF;TRB:CSARDPRQGYEQFF;TRA:CAASHNNNDMRF;TRA:CAVMEYGNKLVFNANA0.0949720711110.11857730-0.02773367-0.003626403G1 G1 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCXCL13+ TpH CXCL13+ TpH CD4+ T cellsCXCL13+ TpH CD4+ T cells1.849905481Naive Treg 0.766666791BloodJIA_Croft47571769AACGTTGAGGATGGAA-1_1JIA_Croft47571769PBMC1475717691.8499054293 92clonotype1286TRB:CASSPILGGPTEAFF;TRB:CSARDPRQGYEQFF;TRA:CAASHNNNDMRF;TRA:CAVMEYGNKLVFNANA0.0949720711110.11857730-0.02773367-0.003626403G1 G1 91BloodB91Fbatch16.9P-Oligo1NT cell5181CD4+ TT cellsCXCL13+ TpH CXCL13+ TpH CD4+ T cellsCXCL13+ TpH CD4+ T cells1.849905481Naive Treg 0.766666791BloodNaive Treg 0.7333333
In [172]:
## Add umap and cell state predictions to seurat obj.
tregs@reductions[["umap"]] <- CreateDimReducObject(embeddings = query$umap[rownames(tregs@meta.data),], key = "UMAP_", assay = DefaultAssay(tregs))
tregs <- AddMetaData(tregs, mt$cell_type_pred_knn, "cell.states")
tregs <- AddMetaData(tregs, mt$cell_type_pred_knn_prob, "cell.states.knn.prob")

Save Symphony obj¶

In [75]:
# saveRDS(query, "/data/brennerlab/Shani/projects/Treg/analysis/Treg_nonRA/JIA_Croft_Tregs_symphony.RDS")
# saveRDS(tregs, "/data/brennerlab/Shani/projects/Treg/analysis/Treg_nonRA/JIA_Croft_Tregs_symphony_Seurat.RDS")


tregs <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/Treg_nonRA/JIA_Croft_Tregs_symphony_Seurat.RDS")
query <- readRDS( "/data/brennerlab/Shani/projects/Treg/analysis/Treg_nonRA/JIA_Croft_Tregs_symphony.RDS")
In [6]:
tregs
An object of class Seurat 
30803 features across 5523 samples within 1 assay 
Active assay: RNA (30803 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 4 dimensional reductions calculated: pca, harmony, humap, umap

Further exploration¶

In [10]:
query$meta_data %>% names()
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'cellID'
  5. 'orig.ident.x'
  6. 'nCount_RNA.x'
  7. 'nFeature_RNA.x'
  8. 'orig.ident.y'
  9. 'nCount_RNA.y'
  10. 'nFeature_RNA.y'
  11. 'percent.mt'
  12. 'nCount_Protein'
  13. 'nFeature_Protein'
  14. 'T_clonotype_id'
  15. 'T_cdr3s_aa'
  16. 'B_clonotype_id'
  17. 'B_cdr3s_aa'
  18. 'scrublet_score'
  19. 'integrated_snn_res.0.1'
  20. 'integrated_snn_res.0.15'
  21. 'seurat_clusters'
  22. 'integrated_snn_res.0.12'
  23. 'percent.ribro'
  24. 'percent.Hb'
  25. 'S.Score'
  26. 'G2M.Score'
  27. 'Phase'
  28. 'old.ident'
  29. 'patient'
  30. 'TYPE'
  31. 'sample'
  32. 'sex'
  33. 'batch'
  34. 'age'
  35. 'condition'
  36. 'njoint'
  37. 'ana'
  38. 'global1'
  39. 'onset'
  40. 'maxjoint0623'
  41. 'label15'
  42. 'simple15'
  43. 'clusters2312'
  44. 'oldclusters'
  45. 'label15s'
  46. 'subclusters'
  47. 'main_clusters'
  48. 'percent.mito'
  49. 'humap_fgraph_res.1'
  50. 'humap_fgraph_res.0.8'
  51. 'cell.states'
  52. 'cell.states.knn.prob'
  53. 'donorID'
  54. 'tissue'
  55. 'cell_type_pred_knn'
  56. 'cell_type_pred_knn_prob'
In [24]:
fig.size(6,6)
count.groups <- query$meta_data %>% group_by(patient) %>% mutate(totalP = n()) %>% ungroup()%>% 
select(cell_type_pred_knn, TYPE, patient, totalP) %>% 
group_by(cell_type_pred_knn, TYPE, patient) %>% mutate(nType = n()) %>%
mutate(propType = nType/ totalP) %>% ungroup() %>% 
group_by(cell_type_pred_knn, TYPE) %>% summarize(prop.average = mean(propType))
count.groups
ggplot(count.groups, aes(x = TYPE, y = prop.average, fill = cell_type_pred_knn)) +
       geom_col(position = "fill") + scale_fill_manual(values = cell.state.colors.v2$cell.states) + theme_base(base_size = 20)
`summarise()` has grouped output by 'cell_type_pred_knn'. You can override using the `.groups`
argument.
A grouped_df: 29 × 3
cell_type_pred_knnTYPEprop.average
<fct><chr><dbl>
Naive Treg Blood 0.098614599
Naive Treg SF 0.009955783
Naive Treg Tissue0.016194000
CD25int Treg Blood 0.084877277
CD25int Treg SF 0.098925497
CD25int Treg Tissue0.112756653
CD25high Treg Blood 0.049476911
CD25high Treg SF 0.290763208
CD25high Treg Tissue0.200442745
CD25highCXCR6+ TregBlood 0.002112659
CD25highCXCR6+ TregSF 0.191614706
CD25highCXCR6+ TregTissue0.158955875
AREG+ Treg Blood 0.005190914
AREG+ Treg SF 0.023143855
AREG+ Treg Tissue0.205168262
TNFAIP3+ Treg SF 0.006382039
TNFAIP3+ Treg Tissue0.135917068
CD161+mem. Treg Blood 0.008174522
CD161+mem. Treg SF 0.015064627
CD161+mem. Treg Tissue0.017886946
ISG Treg Blood 0.006153404
ISG Treg SF 0.070800637
ISG Treg Tissue0.007396221
GZM Treg Blood 0.004656989
GZM Treg SF 0.004756350
GZM Treg Tissue0.007300054
Prolif. Blood 0.001792115
Prolif. SF 0.002680202
Prolif. Tissue0.005463594
In [28]:
fig.size(6,6)
count.groups <- query$meta_data %>% select(cell_type_pred_knn, condition) %>% group_by(cell_type_pred_knn, condition) %>% count()


fig.size(6,6)
count.groups <- query$meta_data %>% group_by(patient) %>% mutate(totalP = n()) %>% ungroup()%>% 
select(cell_type_pred_knn, condition, patient, totalP) %>% 
group_by(cell_type_pred_knn, condition, patient) %>% mutate(nType = n()) %>%
mutate(propType = nType/ totalP) %>% ungroup() %>% 
group_by(cell_type_pred_knn, condition) %>% summarize(prop.average = mean(propType))
 
ggplot(count.groups, aes(x = condition, y = prop.average, fill = cell_type_pred_knn)) +
    geom_col(position = "fill") + scale_fill_manual(values = cell.state.colors.v2$cell.states) + 
    theme_base(base_size = 20) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(count.groups, aes(x = condition, y = prop.average, fill = cell_type_pred_knn)) +
    geom_col(position = "stack") + scale_fill_manual(values = cell.state.colors.v2$cell.states) + 
    theme_base(base_size = 20) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
`summarise()` has grouped output by 'cell_type_pred_knn'. You can override using the `.groups`
argument.
In [26]:
table(query$meta_data$condition, query$meta_data$patient)
          
            84  88  91  95  96  97  98  99 811 814 817 910
  Ex-Oligo   0   0   0   0   0   0   0   0   0   0   0 552
  P-Oligo    0   0 558 592   0   0 820 270 281   0 496   0
  Poly-neg 563   0   0   0   0 263   0   0   0   0   0   0
  PsA        0   0   0   0 194   0   0   0   0 238   0   0
  RF+Poly    0 696   0   0   0   0   0   0   0   0   0   0
In [27]:
table(query$meta_data$cell_type_pred_knn, query$meta_data$patient)
                     
                       84  88  91  95  96  97  98  99 811 814 817 910
  Naive Treg           50  38  73  90   4  42  54   2   9  33  16  11
  CD25int Treg        139  88 100 146  22  29  68  22  31  48  33  98
  CD25high Treg       183 207 200 171  96  59 337  74 185  44 168  87
  CD25highCXCR6+ Treg  23 195 102  96  40  85 202  57  36  45 156  33
  AREG+ Treg           84  74   5  56  16   7  24  61   2  24  31 195
  TNFAIP3+ Treg        41  66   1   7   4   3  68  47   2  34  64 118
  CD161+mem. Treg       4  11  12  11   8   8  18   3   9   5  16   5
  ISG Treg             37  16  60   9   2  30  45   1   6   5   7   1
  GZM Treg              1   0   4   6   0   0   2   2   1   0   3   4
  Prolif.               1   1   1   0   2   0   2   1   0   0   2   0
In [177]:
obj$condition %>% unique()
  1. 'P-Oligo'
  2. 'Poly-neg'
  3. 'RF+Poly'
  4. 'PsA'
  5. 'Ex-Oligo'
In [76]:
fig.size(6,6)
query$meta_data <- query$meta_data %>% group_by(patient, T_clonotype_id) %>% mutate(cloneSize = n()) %>% 
mutate(cloneSize = if_else(is.na(T_clonotype_id), 0, cloneSize))%>% 
ungroup()
In [77]:
qp <- query$umap %>% as.data.frame %>% cbind(query$meta_data) %>% arrange(cloneSize) %>% 
ggplot(aes(x=UMAP1, y=UMAP2, color=as.factor(cloneSize))) + geom_point(size = 0.1, position = ) + 
theme_bw(base_size = 18) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +coord_fixed()


qp
In [83]:
metadata$cloneSize %>% table
.
   0    1    2    3    4    5    6    7    8    9   12   23   26 
 340 3603  822  297  136  100   48   49   40   27   12   23   26 
In [92]:
fig.size(6,8)
metadata <- query$meta_data %>% 
mutate(cloneCategory = ifelse(is.na(T_clonotype_id), "No TCR information",
                                ifelse(cloneSize == "1", "Singlet",
                                ifelse(cloneSize %in% c(2:3), "Small (1 < X <= 3)",
                                       ifelse(cloneSize %in% c(4:10), "Medium (3 < X <= 10)",
                                            ifelse(cloneSize %in% c(11:30), "Large (10 < X <= 30)", 
                                                     "Hyperexpanded (30 < X)")))))) %>%
    mutate(clonal = ifelse(cloneSize > 1, "Clonal", "Not Clonal"))
# metadata$cloneCategory <- tidyr::replace_na(metadata$cloneCategory, "No TCR information")


metadata$cloneCategory <- factor(metadata$cloneCategory, levels = c("Hyperexpanded (30 < X)", 
                                                                "Large (10 < X <= 30)", 
                                                                "Medium (3 < X <= 10)", 
                                                                "Small (1 < X <= 3)", 
                                                                "Singlet", 
                                                                "No TCR information"))

category.order <- c("Hyperexpanded (30 < X)", "Large (10 < X <= 30)",
                  "Medium (3 < X <= 10)","Small (1 < X <= 3)",
                  "Singlet", "No TCR information", "No TCR data")

qp <- query$umap %>% as.data.frame %>% cbind(metadata) %>%  
ggplot(aes(x=UMAP1, y=UMAP2, color=cloneCategory)) + geom_point(size = 1) + 
theme_bw(base_size = 18) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +coord_fixed() +
scale_color_manual(values = colors.no.info)


qp
In [59]:
fig.size(5,8)
metadata %>%
    group_by(cell_type_pred_knn, cloneCategory) %>%
    dplyr::count() %>%
    group_by(cell_type_pred_knn) %>%
    mutate(Percent=100*n/sum(n)) %>%
    ungroup() %>%
    ggplot(aes(x=cell_type_pred_knn, y=Percent, fill=cloneCategory)) +
        geom_col() +
        # scale_fill_brewer(palette = "Blues", direction = -1, name = "Clone Type") +
        scale_fill_manual(values = colors.no.info) +
        scale_y_continuous(labels = function(x) paste0(x, "%")) +
        ggtitle(NULL) +
        labs(fill = "Sample") +
        xlab("cell states") + ylab(NULL) +
        theme_classic() + 
        theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
              legend.text = element_text(size=12),
              legend.title = element_text(size = 14),
            plot.margin = margin(t = 0, r = 0, b = 0, l = 2, unit = "cm"))  


metadata %>%
    group_by(TYPE, cloneCategory) %>%
    dplyr::count() %>%
    group_by(TYPE) %>%
    mutate(Percent=100*n/sum(n)) %>%
    ungroup() %>%
    ggplot(aes(x=TYPE, y=Percent, fill=cloneCategory)) +
        geom_col() +
        # scale_fill_brewer(palette = "Blues", direction = -1, name = "Clone Type") +
        scale_fill_manual(values = colors.no.info) +
        scale_y_continuous(labels = function(x) paste0(x, "%")) +
        ggtitle(NULL) +
        labs(fill = "Sample") +
        xlab("cell states") + ylab(NULL) +
        theme_classic() + 
        theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
              legend.text = element_text(size=12),
              legend.title = element_text(size = 14),
            plot.margin = margin(t = 0, r = 0, b = 0, l = 2, unit = "cm"))  
# merged@meta.data %>%
#     group_by(tissue, cloneCategory) %>%
#     dplyr::count() %>%
#     group_by(tissue) %>%
#     mutate(Percent=100*n/sum(n)) %>%
#     ungroup() %>%
#     ggplot(aes(x=tissue,y=Percent, fill=cloneCategory)) +
#         geom_col() +
#         # scale_fill_brewer(palette = "Blues", direction = -1, name = "Clone Type") +
#         scale_fill_manual(values = colors.no.info) +
#         scale_y_continuous(labels = function(x) paste0(x, "%")) +
#         ggtitle(NULL) +
#         labs(fill = "Sample") +
#         xlab("tissue") + ylab(NULL) +
#         theme_classic() + 
#         theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
#               legend.text = element_text(size=12),
#               legend.title = element_text(size = 14))
In [61]:
fig.size(8, 12)
#getting clones for each cluster
# unique.states <- unique(merged$cell.states)
unique.tissue <- unique(metadata$tissue)
all.states.clones <- list()
for (s in unique.tissue){
        a <- metadata %>% filter(tissue == s) %>% 
    select(T_clonotype_id) %>% na.omit() %>% distinct(T_clonotype_id)
        all.states.clones[s] = a
    }

#using the upsetR package
# all.states.clones 
UpSetR::upset(UpSetR::fromList(all.states.clones), sets = unique.tissue, keep.order = F, 
              mainbar.y.label = "Number of Overlapping Clones", text.scale = 2,
              sets.x.label = "Unique Clones in tissue", order.by = "freq")
In [65]:
fig.size(8, 18)
#getting clones for each cluster
# unique.states <- unique(merged$cell.states)
unique.tissue <- unique(metadata$cell_type_pred_knn)
all.states.clones <- list()
for (s in unique.tissue){
        a <- metadata %>% filter(cell_type_pred_knn == s) %>% 
    select(T_clonotype_id) %>% na.omit() %>% distinct(T_clonotype_id)
        all.states.clones[s] = a
    }

#using the upsetR package
# all.states.clones 
UpSetR::upset(UpSetR::fromList(all.states.clones), sets = unique.tissue, keep.order = F, 
              mainbar.y.label = "Number of Overlapping Clones", text.scale = 2,
              sets.x.label = "Unique Clones in cell.state", order.by = "freq")

Markers expression¶

In [ ]:
1
In [30]:
fig.size(6,10)
Idents(tregs) <- "cell.states"
DimPlot(tregs, reduction = "umap", cols = cell.state.colors.v2$cell.states, pt.size = 0.8)
In [31]:
fig.size(8, 12)
query$umap %>% as.data.frame %>% 
cbind(query$meta_data) %>% 
ggplot(aes(x=UMAP1, y=UMAP2, color=cell.states)) + geom_point() + 
scale_color_manual(values = cell.state.colors.v2$cell.states) +
theme_bw(base_size = 18) + guides(colour = guide_legend(override.aes = list(size=2))) + 
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + coord_fixed()
In [179]:
fig.size(3,4)
FeaturePlot(tregs, features = "IL2RA", reduction = "umap") + coord_fixed()
FeaturePlot(tregs, features = "FOXP3", reduction = "umap") + coord_fixed()
FeaturePlot(tregs, features = "AREG", reduction = "umap") + coord_fixed()
FeaturePlot(tregs, features = "CXCR6", reduction = "umap") + coord_fixed()
FeaturePlot(tregs, features = "CXCR4", reduction = "umap") + coord_fixed()
FeaturePlot(tregs, features = "CXCL13", reduction = "umap") + coord_fixed()
FeaturePlot(tregs, features = "TCF7", reduction = "umap") + coord_fixed()
FeaturePlot(tregs, features = "NR3C1", reduction = "umap") + coord_fixed()
FeaturePlot(tregs, features = "NR4A2", reduction = "umap") + coord_fixed()
In [120]:
#non expressed - EGF, EPGN, NRG3, 
ligands <- c("VEGF", "AREG", "TGFA", "HBEGF", "EREG", "BTC", "NRG4", "NRG1", "NRG2")
ligands2 <- c("NR3C1", "NR4A2", "NR4A1")
In [122]:
fig.size(10,10)
FeaturePlot(tregs, features = ligands, reduction = "umap") 

fig.size(3,10)
FeaturePlot(tregs, features = ligands2, reduction = "umap", ncol =3)
Warning message:
“The following requested variables were not found: VEGF”
In [128]:
fig.size(5,15)
FeaturePlot(tregs, features = "AREG", reduction = "umap", split.by = "TYPE")
In [123]:
Tregs.markers <- wilcoxauc(tregs, "cell.states")

options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)

Tregs.markers %>% 
    group_by(group) %>% 
    filter(padj < 0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    #slice_max(n = 40, order_by = logFC)%>% 
    dplyr::select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature) -> Treg.show
Treg.show[1:50,]
A tibble: 50 × 11
rowAREG+ TregCD161+mem. TregCD25high TregCD25highCXCR6+ TregCD25int TregGZM TregISG TregNaive TregProlif.TNFAIP3+ Treg
<int><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1FOS AQP3 LGALS1 TNFRSF18 RPS29 GZMK ISG15 LEF1 DEK CREM
2JUNB GIMAP7 S100A4 TNFRSF4 IFITM1CCL5 MX1 EEF1B2STMN1 JUNB
3DUSP1 GPR25 LGALS3 CTSC RPS12 GZMA IFI6 TCF7 TUBA1B ZFP36
4ZNF331 CD52 S100A6 LINC01943LEF1 IFNG-AS1 LY6E RPS3A BCAP31 FOSB
5JUND S100A11 S100A10 CTLA4 RPS27 GIMAP5 IFIT1 RPS12 NME2 DUSP2
6FOSB KLRB1 ANXA2 BATF TXNIP DENR ISG20 RPL3 DUT RGCC
7JUN CNN2 EMP3 CD74 RPL32 PRKCQ-AS1HERC5 RPS6 NDUFA6 DUSP1
8CREM FOXP1 CD74 TIGIT RPL30 GLMN EPSTI1 TXNIP TXN ZNF331
9ZFP36 GIMAP4 HLA-DRB1SPOCK2 RPL13 TIMM22 XAF1 RPL13 CCDC167 FOS
10DUSP2 NOSIP S100A11 PKM RPS8 CHI3L2 STAT1 RPS5 YBX1 TNFAIP3
11NR4A2 GIMAP5 CRIP1 CXCR6 RPS18 ACACA OAS1 RPS8 GAPDH CXCR4
12CD69 TRAF3IP3 ISG20 IL2RA RPS21 SREBF1 IFITM1 RPL32 SUMO3 DNAJB1
13TNFAIP3 EMP3 HLA-DPA1CD2 RPS3A KRBOX4 MX2 RPS13 PFN1 LMNA
14RGCC TLE5 SELPLG TYMP RPL34 NA SP100 SELL C17orf49NR4A2
15DNAJB1 LIMS1 VIM GAPDH RPS13 NA IFI16 CCR7 PRDX3 JUND
16CXCR4 SLC25A5 HLA-DPB1ENO1 RPS28 NA OASL RPL10AGSTP1 JUN
17SLC2A3 RPLP0 LY6E FOXP3 RPL8 NA SMCHD1 RPS18 TWF2 PHLDA1
18KLF6 COTL1 CD52 TNFRSF1B RPL10 NA IRF7 RPLP2 DCTPP1 DNAJA1
19DNAJA1 KLF3 CLIC1 HLA-DRB1 RPL3 NA TRIM22 EEF1G CDT1 CD69
20BTG2 ARPC1B CD99 LGALS3 RPL18ANA EIF2AK2RPL5 PHF19 PPP1R15A
21YPEL5 LAPTM5 SYNE2 SRGN RPL11 NA RNF213 RPL9 OVCA2 DUSP4
22PPP1R15A ACTG1 SH3BGRL3CARD16 RPL39 NA SAMD9 GIMAP7ACTB YPEL5
23RGS1 MGAT4A LIME1 PGAM1 RPL36 NA CMPK2 RPL29 PSMA3 TSC22D3
24SOCS1 RCSD1 IL32 RNF213 RPS3 NA IFI44L FOXP1 MDH1 TENT5C
25TENT5C RPL7 TLE5 LGALS1 RPLP2 NA UBE2L6 RPS23 TRBV6-1 SELENOK
26ZFP36L2 IKZF3 LSP1 TPI1 RPL22 NA SLFN5 RPL21 HLA-DMA HSPH1
27TSC22D3 RPSA CYBA IL2RB RPL21 NA DRAP1 RPL4 SNRPE KLF6
28FTH1 MSN TMSB10 IL2RG RPL14 NA LGALS9 RPL7 RBPJ BTG2
29UBE2S RIPOR2 HLA-A CST7 RPL19 NA SP110 RPL22 MCM5 SLC2A3
30SELENOK COX6C HLA-C MYL6 RPS23 NA SAMD9L RPL18APSMB2 RGS1
31NR3C1 CCR4 PFN1 LAG3 RPS6 NA TAP1 RPS21 MCM7 FTH1
32HSPA1B ABRACL ARHGDIB DUSP4 RPS27ANA PARP9 RPL13APDAP1 NR4A1
33RGS2 S1PR4 TMSB4X PHACTR2 EEF1B2NA BST2 EEF1A1COA3 SRGN
34HSP90AA1 CYTH1 B2M TIMP1 RPL41 NA IFIT3 RPL30 SUCLG1 SAMSN1
35HSPH1 RORA NA CKLF RPL12 NA OAS3 RPL36APPIA HSP90AA1
36TAGAP CCND3 NA IGFLR1 RPL5 NA USP18 RPL34 MRPL22 UBE2S
37SRGN RPL4 NA PRDM1 RPL10ANA SPATS2LRPS27ADDX49 SOCS1
38TSPYL2 CDC25B NA LAYN RPL37 NA IFI35 RPL38 SLBP MYADM
39FAAH2 CD6 NA PTPRJ RPS15ANA IFI44 RPS2 CISD1 BTG1
40CCNH MKRN1 NA S100A4 RPL29 NA OAS2 GAS5 RPA3 CSRNP1
41H3F3B MVP NA DNPH1 FAU NA SYNE2 RPL39 CLSPN NR4A3
42NR4A1 SYTL1 NA ACP5 RPS14 NA ADAR RPS3 PTTG1 NAMPT
43PHLDA1 PLSCR3 NA IL12RB2 RPS5 NA TYMP RPS14 SLC35E4 HSPA1B
44GEM ITGB7 NA CD4 RPL38 NA RSAD2 RPL19 CMC1 CCNH
45TUBB4B GIMAP1 NA CBLB RPS25 NA ZBP1 RPL36 POLR3K PTPN22
46LMNA HMOX2 NA CACYBP FTL NA PSMB8 LDHB PCLAF FOSL2
47MTRNR2L12AC004687.1NA SH2D2A RPL35ANA USP15 RPL37 APOBEC3GHSPA8
48DDIT4 TC2N NA TXN RPL26 NA GBP1 RPS20 NABP2 IRF1
49EIF1 CDC123 NA GBP5 RPL18 NA FOXP3 RPL23ALIG1 NFKBIA
50SOCS3 TMSB10 NA UCP2 RPS7 NA DDX60L PABPC1MRPS7 HSP90AB1

Metabolism Genes¶

In [129]:
mg <- read.csv("/data/brennerlab/Shani/projects/Treg/useful_tables/metabolism_genes_manually_curated.csv")
mg %>% head
A data.frame: 6 × 3
GlycolysisFAOOXPHOS
<chr><chr><chr>
1HK1 ACADVLPDHA1
2 HK2 ACADM DLST
3 PFKP HADHA SDHA
4 PGAM1HADHB CS
5 ENO1 CPT1A ACO1
6 PKM CPT2 IDH1/2/3
In [130]:
fig.size(4,8)
FeaturePlot(tregs, features = mg$Glycolysis, reduction = "umap") + coord_fixed()
Warning message:
“The following requested variables were not found:  HK2,  PFKP,  PGAM1,  ENO1,  PKM,  LDHA,  PDHA1,  TPI1,  GAPDH”
In [131]:
fig.size(8, 12)
FeaturePlot(tregs, features = mg$FAO, reduction = "umap")
Warning message:
“The following requested variables were not found: ”
In [132]:
fig.size(6, 10)
FeaturePlot(tregs, features = mg$OXPHOS, reduction = "umap")
Warning message:
“The following requested variables were not found: ACO1 , IDH1/2/3, OGDH , SUCLA2/SUCLG1/SUCLG2, FH , MDH1/2, DLD ”
In [ ]:

Try harmonizid CD and RA Tregs¶

In [133]:
RA <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/integrated/integrated.Tregs.rds")
In [135]:
length(rownames(RA))
length(rownames(tregs))

inters <- intersect(rownames(RA), rownames(tregs))
length(inters)
38224
30803
30803
In [140]:
tregs@meta.data <- tregs@meta.data %>% mutate(donorID = patient) %>% mutate(tissue = TYPE)
In [141]:
merged <- merge(RA[inters,], tregs[inters,])
In [142]:
merged
An object of class Seurat 
30803 features across 24432 samples within 1 assay 
Active assay: RNA (30803 features, 0 variable features)
 2 layers present: counts, data
In [143]:
merged <- NormalizeData(merged, normalization.method = "LogNormalize", scale.factor = 10000, verbose = F) %>% 
    FindVariableFeatures(selection.method = "vst", nfeatures = 2000, verbose = F) %>% 
    ScaleData() %>% 
    RunPCA(verbose = F)
Centering and scaling data matrix

In [147]:
unique(merged$orig.ident)
unique(merged$tissue)
  1. 'AMP2'
  2. 'AMPrep'
  3. 'SF.BL'
  4. 'HD_Luo'
  5. 'JIA_Croft'
  1. 'RA.Syn.Tissue'
  2. 'RA.PBMC'
  3. 'RA.Syn.Fluid'
  4. 'HD.PBMC'
  5. 'Blood'
  6. 'SF'
  7. 'Tissue'
In [145]:
fig.size(4,4)
set.seed(1)
merged <- RunHarmony(merged, group.by.vars=c("orig.ident", "donorID", "tissue"), reduction.use = "pca", 
                 plot_convergence = TRUE, max_iter = 10,
                 early_stop = T)
Transposing data matrix

Initializing state using k-means centroids initialization

Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony converged after 3 iterations

In [146]:
set.seed(1)
merged <- Run_uwot_umap(merged)
merged <- FindClusters(merged, graph.name = 'humap_fgraph', resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 24432
Number of edges: 304539

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7036
Number of communities: 11
Elapsed time: 2 seconds
In [148]:
merged$tissue <- factor(merged$tissue, levels = c("HD.PBMC", "RA.PBMC", "RA.Syn.Fluid", "RA.Syn.Tissue", 'Blood', 'SF', 'Tissue'))
In [149]:
fig.size(8,8)
DimPlot(merged, label = T, repel = T, pt.size = 0.5, label.size = 6) + 
  scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
DimPlot(merged, group.by = "donorID")
DimPlot(merged, group.by = "tissue")
DimPlot(merged, group.by = "orig.ident")

DimPlot(merged, group.by = "cell.states") + scale_color_manual(values = cell.state.colors$cell.states)
In [150]:
fig.size(6, 20)
DimPlot(merged, group.by = "cell.states", split.by = "tissue") + scale_color_manual(values = cell.state.colors$cell.states)
In [ ]:

Plot RA Treg reference¶

In [116]:
fig.size(8, 12)
tregRef$umap$embedding %>% as.data.frame %>% 
cbind(tregRef$meta_data) %>% filter(tissue != "HD.PBMC") %>% 
ggplot(aes(x=UMAP1, y=UMAP2, color=cell.states)) + geom_point() + 
scale_color_manual(values = cell.state.colors$cell.states) +
theme_bw(base_size = 18) + guides(colour = guide_legend(override.aes = list(size=2))) + 
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + coord_fixed()
In [84]:
fig.size(12,16)
tregRef$umap$embedding %>% as.data.frame %>% 
cbind(tregRef$meta_data) %>% 
ggplot(aes(x=UMAP1, y=UMAP2, color=cell.states)) + geom_point() + 
scale_color_manual(values = cell.state.colors$cell.states) + facet_wrap(~tissue) +
theme_bw(base_size = 18) + guides(colour = guide_legend(override.aes = list(size=2))) + 
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + coord_fixed()